import stable_baselines3
import winsound
import pprint
import numpy as np
import cv2
import gym
import highway_env
from stable_baselines3 import DQN, DDPG, PPO
from highway_env.vehicle.kinematics import Performance, Logger

import tensorboard
import torch

situation = "racetrack-v0"
frameSize = (640, 280)
out = cv2.VideoWriter('video-DDPG-' + situation + '.avi', cv2.VideoWriter_fourcc(*'DIVX'), 30, frameSize)
env = gym.make(situation)
env.configure({
    "screen_width": 640,
    "screen_height": 280,
    "normalize_reward": False,
    # "offroad_terminal":True,
    "simulation_frequency": 30,
    "policy_frequency": 30
})
env.reset()
pprint.pprint(env.config)

new = 1

if new == 1:
    model = DDPG('MlpPolicy', env,
                 policy_kwargs=dict(net_arch=[256, 256, 256]),
                 learning_rate=0.00001,  # 0.001 #5e-4
                 buffer_size=int(1e6),
                 learning_starts=int(1e5),
                 batch_size=128,  # 128 worked better for racetrack
                 tau=0.005,  # betweeon 0,1
                 gamma=0.99,
                 train_freq=(1, 'episode'),
                 gradient_steps=1,  # How many gradient steps to do after each rollout (see train_freq)
                 # Set to -1 means to do as many gradient steps as steps done in the environment during the rollout.

                 # action_noise = #Optional
                 # replay_buffer_class #Optional
                 # replay_buffer_kwargs #Optional
                 action_noise=stable_baselines3.common.noise.NormalActionNoise(0.5, 0.25),
                 optimize_memory_usage=False,  # (bool)  Enable a memory efficient variant of
                 # the replay buffer at a cost of more complexity

                 verbose=1,
                 # Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for debug messages
                 # seed Optional
                 device='cpu',  # Device (cpu, cuda, …)
                 # on which the code should be run. Setting it to auto, the code will be run on the GPU if possible.
                 tensorboard_log=situation + "_ddpg/")
    model.learn(int(1000), progress_bar=True)
    model.save(situation + "ddpg" + "/model_1k")

    model.learn(int(9000), progress_bar=True)
    model.save(situation + "ddpg" + "/model_10k")

    model.learn(int(40000), progress_bar=True)
    model.save(situation + "ddpg" + "/model_50k")

    print("\n Done Learning!! \n")

if new == 0:
    model = DDPG.load(situation + "/model")
    model.set_env(env)
    model.learn(int(1e4), progress_bar=True)
    model.save(situation + "/model")
    print("\n Done Learning!! \n")
    winsound.MessageBeep()

perfm = Performance()
lolly = Logger()

model = DDPG.load(situation + "/model_50k")
number_of_runs = 3
for f in range(number_of_runs):
    done = truncated = False
    obs, info = env.reset()
    reward = 0

    ego_car = env.controlled_vehicles[0]

    while (not done) and ego_car.speed > 2:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)

        lolly.file(ego_car)

    perfm.add_measurement(lolly)
    lolly.clear_log()

perfm.print_performance()
print('DONE2')

########## Load and test saved model##############
model = DDPG.load(situation + "/model_50k")
number_of_collisions = 0
for f in range(15):
    done = truncated = False
    obs, info = env.reset()

    while not (done or truncated):
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        # print(reward)

        if info.get('crashed'):
            number_of_collisions += 1
        env.render()
        cur_frame = env.render(mode="rgb_array")
        out.write(cur_frame)

out.release()

print('DONE_end')

